#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 29 15:03:02 2022

@author: cythnia
"""
#————————————————————————————————————————————#
#练习：爬取网易新闻分省份疫情数据（selenium）
#—————————————————————————————————————————————#
#导入工具包
import pandas as pd
import numpy as np
import time 
from selenium import webdriver
#设置驱动浏览器
driver=webdriver.Chrome()
#要爬取的网址
url='https://wp.m.163.com/163/page/news/virus_report/index.html?_nw_=1&_anw_=1'
#生成空list存放数据
lis=[]
#获取网页信息
driver.get(url)
driver.implicitly_wait(10) #进入网页后等待十秒再爬取
#获取省份信息
shengfen=driver.find_elements_by_css_selector('body > div.common-container > div:nth-child(2) > div.wrap > ul > li > div > span.item_name')
#获取现有确诊人数
xianyouquezhen=driver.find_elements_by_css_selector('body > div.common-container > div:nth-child(2) > div.wrap > ul > li > div > span.item_newconfirm')
#获取历史确诊人数
lishiquezhen=driver.find_elements_by_css_selector('body > div.common-container > div:nth-child(2) > div.wrap > ul > li > div > span.item_confirm')
#获取死亡人数
siwang=driver.find_elements_by_css_selector('body > div.common-container > div:nth-child(2) > div.wrap > ul > li > div > span.item_dead')
for i in siwang:
    print(i.text)
#获取治愈人数
zhiyu=driver.find_elements_by_css_selector('body > div.common-container > div:nth-child(2) > div.wrap > ul > li > div > span.item_heal')
#for 循环组合函数
for shengfens,xianyouquezhens,lishiquezhens,siwangs,zhiyus in zip(shengfen,xianyouquezhen,lishiquezhen,siwang,zhiyu):
    lis.append([shengfens.text,xianyouquezhens.text,lishiquezhens.text,siwangs.text,zhiyus.text])
lis
#生成二维数据
result=pd.DataFrame(lis,columns=['地区','现有确诊人数','历史确诊人数','死亡人数','治愈人数'])
result.to_excel('/Users/cythnia/Desktop/4.29分省份疫情数据.xlsx')
#——————————————————————————————————————————————————————————————#
#查找细分市的数据
#——————————————————————————————————————————————————————————————#
#
shi=driver.find_elements_by_css_selector('body > div.common-container > div:nth-child(2) > div.wrap > ul > li:nth-child(1) > ul > li > span')
print(shi.text)